import pandas as pd
import os
import numpy as np

def get_compas_data():
    main = pd.read_csv('data/COMPAS/compas-scores-two-years.csv')
    main = main[(abs(main['days_b_screening_arrest']) <= 30)]
    cols = ['sex', 'age', 'age_cat', 'race', 'juv_fel_count', 'juv_misd_count', 'juv_other_count', 'priors_count', 'c_charge_degree', 'score_text', 'v_score_text', 'is_recid']

    # Set X and y
    df = main[cols]
    df['y'] = df['is_recid']
    df = df.drop('is_recid', axis=1)
    df.drop(df.index[df['y'] == -1], axis=0).dropna().reset_index(drop=True)

    data_x = df.drop('y', axis=1)
    data_x = data_x.drop(columns=["age_cat", "score_text", "v_score_text", "sex"])
    data_x['race'] = data_x['race'].apply(lambda x: x.strip())
    # 1 for non-white, 0 for white
    data_x['race'] = data_x['race'].apply(lambda x: 1 if x.lower() != 'caucasian' else 0)
    # 0 for misdemeanor, 1 for felony
    data_x['c_charge_degree'] = data_x['c_charge_degree'].apply(lambda x: 1 if x.lower() == 'f' else 0)
    data_x.columns = ['age', 'race', 'juveline_felony_count', 'juvenile_misdimeanor_count', 'juvenile_other_count', 'priors_count', 'charge_degree']

    data_y = df.y

    return data_x.reset_index(drop=True), data_y.reset_index(drop=True)